######################################################
# CreateKnowledgeVariables.R
# This script codes the answers that respondents provided on the test
# questions at the end of the experiment. The main output is 
# several variables summarizing the accuracy of responses.
# note this file requires the working directory to be set to "./Merge and Clean Data/"
# note we comment out some coordinate adjustments since we cannot use coordinates
# without violating anonymity. Instead we merge in the distance variables
# we use at a later point. 
# Contact Ryan Jablonski, r.s.jablonski@lse.ac.uk with questions
#
# Log
# Created 2018
# Edited for APSR replication 17 August 2023 by Ryan Jablonski
######################################################

library(plyr)
library(readstata13)

rm(list=ls(all=TRUE))


#this is the true data about each school shown on the test map. 
test.schools.lc=read.csv("./input/map_h_schools_lc.csv", stringsAsFactors = FALSE)
test.schools.mp=read.csv("./input/map_h_schools_mp.csv", stringsAsFactors = FALSE)

#survey data
mp.survey.all=read.csv("./output/mp_all.csv", stringsAsFactors = FALSE)
mp.survey=read.csv("./output/mp_clean.csv", stringsAsFactors = FALSE)
c.survey.all=read.csv("./output/c_all.csv", stringsAsFactors = FALSE)
c.survey=read.csv("./output/c_clean.csv", stringsAsFactors = FALSE)

#dataset of all education aid projects (provided by donors)
donorportfolios=read.csv(".\\input\\Past Aid Interventions in Malawi Primary Schools with id.csv", stringsAsFactors=FALSE)


test.schools.lc$donor1 = donorportfolios[match(test.schools.lc$school_id, donorportfolios$school_id),"Organisation1"]
test.schools.lc$donor2 = donorportfolios[match(test.schools.lc$school_id, donorportfolios$school_id),"Organisation2"]
test.schools.lc$donor3 = donorportfolios[match(test.schools.lc$school_id, donorportfolios$school_id),"Organisation3"]

test.schools.mp$donor1 = donorportfolios[match(test.schools.mp$school_id, donorportfolios$school_id),"Organisation1"]
test.schools.mp$donor2 = donorportfolios[match(test.schools.mp$school_id, donorportfolios$school_id),"Organisation2"]
test.schools.mp$donor3 = donorportfolios[match(test.schools.mp$school_id, donorportfolios$school_id),"Organisation3"]


length(unique(test.schools.lc$ps_ward_id))
length(unique(test.schools.mp$constituencyid))
length(unique(c.survey.all$ps_ward_id))
length(unique(mp.survey.all$constituencyid))

# respondents are allowed to select "about the same" to some questions
# this variable defines the difference between schools at which we code this answer
# as correct (ie we allow a 5% difference). This value matters very little.
testcut=0.05


#setup variables for LC surveys
tests.lc=data.frame(ps_ward_id=unique(test.schools.lc$ps_ward_id))

#create variables for the order of the number of permanent classrooms
test.schools.lc <- test.schools.lc[order(test.schools.lc$ps_ward_id, test.schools.lc$school_classrooms_permanent),] 

test.schools.lc$test_classes_order=with(test.schools.lc, ave(rep(1, nrow(test.schools.lc)), ps_ward_id, FUN = seq_along))
test.schools.lc$test_classes_min=ifelse(test.schools.lc$test_classes_order==1, test.schools.lc$school_classrooms_permanent, NA)
test.schools.lc$test_classes_max=ifelse(test.schools.lc$test_classes_order==3, test.schools.lc$school_classrooms_permanent, NA)
test.schools.lc$test_classes_median=ifelse(test.schools.lc$test_classes_order==2, test.schools.lc$school_classrooms_permanent, NA)
temp.df=test.schools.lc[!is.na(test.schools.lc$test_classes_min),]
tests.lc$test_classes_min=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_classes_min"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_classes_median),]
tests.lc$test_classes_median=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_classes_median"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_classes_max),]
tests.lc$test_classes_max=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_classes_max"]

test.schools.lc$min_classes_letters=ifelse(test.schools.lc$test_classes_order==1 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$min_classes_letters=ifelse(test.schools.lc$test_classes_order==1 & test.schools.lc$map_letter=="B", "B", test.schools.lc$min_classes_letters)
test.schools.lc$min_classes_letters=ifelse(test.schools.lc$test_classes_order==1 & test.schools.lc$map_letter=="C", "C", test.schools.lc$min_classes_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$min_classes_letters),]
tests.lc$min_classes_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"min_classes_letters"]


test.schools.lc$median_classes_letters=ifelse(test.schools.lc$test_classes_order==2 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$median_classes_letters=ifelse(test.schools.lc$test_classes_order==2 & test.schools.lc$map_letter=="B", "B", test.schools.lc$median_classes_letters)
test.schools.lc$median_classes_letters=ifelse(test.schools.lc$test_classes_order==2 & test.schools.lc$map_letter=="C", "C", test.schools.lc$median_classes_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$median_classes_letters),]
tests.lc$median_classes_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"median_classes_letters"]

test.schools.lc$max_classes_letters=ifelse(test.schools.lc$test_classes_order==3 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$max_classes_letters=ifelse(test.schools.lc$test_classes_order==3 & test.schools.lc$map_letter=="B", "B", test.schools.lc$max_classes_letters)
test.schools.lc$max_classes_letters=ifelse(test.schools.lc$test_classes_order==3 & test.schools.lc$map_letter=="C", "C", test.schools.lc$max_classes_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$max_classes_letters),]
tests.lc$max_classes_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"max_classes_letters"]

tests.lc$min_classes_letters=ifelse(tests.lc$test_classes_median-tests.lc$test_classes_min==0 | (tests.lc$test_classes_median-tests.lc$test_classes_min)/tests.lc$test_classes_median<=testcut, paste(tests.lc$min_classes_letters, tests.lc$median_classes_letters, sep=","), tests.lc$min_classes_letters) 
tests.lc$median_classes_letters=ifelse(nchar(tests.lc$min_classes_letters)>1,tests.lc$min_classes_letters, tests.lc$median_classes_letters)
tests.lc$max_classes_letters=ifelse(tests.lc$test_classes_max-tests.lc$test_classes_median==0 | (tests.lc$test_classes_max-tests.lc$test_classes_median)/tests.lc$test_classes_max<=testcut, paste(tests.lc$max_classes_letters, tests.lc$median_classes_letters, sep=","), tests.lc$max_classes_letters) 
tests.lc$median_classes_letters=ifelse(nchar(tests.lc$max_classes_letters)>1,tests.lc$max_classes_letters, tests.lc$median_classes_letters)
tests.lc$min_classes_letters=ifelse(nchar(tests.lc$max_classes_letters)>4,tests.lc$max_classes_letters, tests.lc$min_classes_letters)

#create variables for most students
test.schools.lc <- test.schools.lc[order(test.schools.lc$ps_ward_id, test.schools.lc$school_enrollment_total),] 

test.schools.lc$test_enrollment_order=with(test.schools.lc, ave(rep(1, nrow(test.schools.lc)), ps_ward_id, FUN = seq_along))
test.schools.lc$test_enrollment_min=ifelse(test.schools.lc$test_enrollment_order==1, test.schools.lc$school_enrollment_total, NA)
test.schools.lc$test_enrollment_max=ifelse(test.schools.lc$test_enrollment_order==3, test.schools.lc$school_enrollment_total, NA)
test.schools.lc$test_enrollment_median=ifelse(test.schools.lc$test_enrollment_order==2, test.schools.lc$school_enrollment_total, NA)
temp.df=test.schools.lc[!is.na(test.schools.lc$test_enrollment_min),]
tests.lc$test_enrollment_min=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_enrollment_min"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_enrollment_median),]
tests.lc$test_enrollment_median=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_enrollment_median"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_enrollment_max),]
tests.lc$test_enrollment_max=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_enrollment_max"]

test.schools.lc$min_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==1 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$min_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==1 & test.schools.lc$map_letter=="B", "B", test.schools.lc$min_enrollment_letters)
test.schools.lc$min_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==1 & test.schools.lc$map_letter=="C", "C", test.schools.lc$min_enrollment_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$min_enrollment_letters),]
tests.lc$min_enrollment_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"min_enrollment_letters"]


test.schools.lc$median_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==2 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$median_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==2 & test.schools.lc$map_letter=="B", "B", test.schools.lc$median_enrollment_letters)
test.schools.lc$median_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==2 & test.schools.lc$map_letter=="C", "C", test.schools.lc$median_enrollment_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$median_enrollment_letters),]
tests.lc$median_enrollment_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"median_enrollment_letters"]

test.schools.lc$max_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==3 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$max_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==3 & test.schools.lc$map_letter=="B", "B", test.schools.lc$max_enrollment_letters)
test.schools.lc$max_enrollment_letters=ifelse(test.schools.lc$test_enrollment_order==3 & test.schools.lc$map_letter=="C", "C", test.schools.lc$max_enrollment_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$max_enrollment_letters),]
tests.lc$max_enrollment_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"max_enrollment_letters"]

tests.lc$min_enrollment_letters=ifelse(tests.lc$test_enrollment_median-tests.lc$test_enrollment_min==0 | (tests.lc$test_enrollment_median-tests.lc$test_enrollment_min)/tests.lc$test_enrollment_median<=testcut, paste(tests.lc$min_enrollment_letters, tests.lc$median_enrollment_letters, sep=","), tests.lc$min_enrollment_letters) 
tests.lc$median_enrollment_letters=ifelse(nchar(tests.lc$min_enrollment_letters)>1,tests.lc$min_enrollment_letters, tests.lc$median_enrollment_letters)
tests.lc$max_enrollment_letters=ifelse(tests.lc$test_enrollment_max-tests.lc$test_enrollment_median==0 | (tests.lc$test_enrollment_max-tests.lc$test_enrollment_median)/tests.lc$test_enrollment_max<=testcut, paste(tests.lc$max_enrollment_letters, tests.lc$median_enrollment_letters, sep=","), tests.lc$max_enrollment_letters) 
tests.lc$median_enrollment_letters=ifelse(nchar(tests.lc$max_enrollment_letters)>1,tests.lc$max_enrollment_letters, tests.lc$median_enrollment_letters)
tests.lc$min_enrollment_letters=ifelse(nchar(tests.lc$max_enrollment_letters)>4,tests.lc$max_enrollment_letters, tests.lc$min_enrollment_letters)


#create variables for support in election
test.schools.lc <- test.schools.lc[order(test.schools.lc$ps_ward_id, test.schools.lc$winner_percent_local),] 


test.schools.lc$test_percentvotes_order=with(test.schools.lc, ave(rep(1, nrow(test.schools.lc)), ps_ward_id, FUN = seq_along))
test.schools.lc$test_percentvotes_min=ifelse(test.schools.lc$test_percentvotes_order==1, test.schools.lc$winner_percent_local, NA)
test.schools.lc$test_percentvotes_max=ifelse(test.schools.lc$test_percentvotes_order==3, test.schools.lc$winner_percent_local, NA)
test.schools.lc$test_percentvotes_median=ifelse(test.schools.lc$test_percentvotes_order==2, test.schools.lc$winner_percent_local, NA)

temp.df=test.schools.lc[!is.na(test.schools.lc$test_percentvotes_min),]
tests.lc$test_percentvotes_min=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_percentvotes_min"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_percentvotes_median),]
tests.lc$test_percentvotes_median=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_percentvotes_median"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_percentvotes_max),]
tests.lc$test_percentvotes_max=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_percentvotes_max"]




test.schools.lc$min_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==1 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$min_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==1 & test.schools.lc$map_letter=="B", "B", test.schools.lc$min_percentvotes_letters)
test.schools.lc$min_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==1 & test.schools.lc$map_letter=="C", "C", test.schools.lc$min_percentvotes_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$min_percentvotes_letters),]
tests.lc$min_percentvotes_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"min_percentvotes_letters"]


test.schools.lc$median_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==2 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$median_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==2 & test.schools.lc$map_letter=="B", "B", test.schools.lc$median_percentvotes_letters)
test.schools.lc$median_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==2 & test.schools.lc$map_letter=="C", "C", test.schools.lc$median_percentvotes_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$median_percentvotes_letters),]
tests.lc$median_percentvotes_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"median_percentvotes_letters"]

test.schools.lc$max_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==3 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$max_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==3 & test.schools.lc$map_letter=="B", "B", test.schools.lc$max_percentvotes_letters)
test.schools.lc$max_percentvotes_letters=ifelse(test.schools.lc$test_percentvotes_order==3 & test.schools.lc$map_letter=="C", "C", test.schools.lc$max_percentvotes_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$max_percentvotes_letters),]
tests.lc$max_percentvotes_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"max_percentvotes_letters"]

tests.lc$min_percentvotes_letters=ifelse(tests.lc$test_percentvotes_median-tests.lc$test_percentvotes_min==0 | (tests.lc$test_percentvotes_median-tests.lc$test_percentvotes_min)/tests.lc$test_percentvotes_median<=testcut, paste(tests.lc$min_percentvotes_letters, tests.lc$median_percentvotes_letters, sep=","), tests.lc$min_percentvotes_letters) 
tests.lc$median_percentvotes_letters=ifelse(nchar(tests.lc$min_percentvotes_letters)>1,tests.lc$min_percentvotes_letters, tests.lc$median_percentvotes_letters)
tests.lc$max_percentvotes_letters=ifelse(tests.lc$test_percentvotes_max-tests.lc$test_percentvotes_median==0 | (tests.lc$test_percentvotes_max-tests.lc$test_percentvotes_median)/tests.lc$test_percentvotes_max<=testcut, paste(tests.lc$max_percentvotes_letters, tests.lc$median_percentvotes_letters, sep=","), tests.lc$max_percentvotes_letters) 
tests.lc$median_percentvotes_letters=ifelse(nchar(tests.lc$max_percentvotes_letters)>1,tests.lc$max_percentvotes_letters, tests.lc$median_percentvotes_letters)
tests.lc$min_percentvotes_letters=ifelse(nchar(tests.lc$max_percentvotes_letters)>4,tests.lc$max_percentvotes_letters, tests.lc$min_percentvotes_letters)



#create variables for aid projects
test.schools.lc <- test.schools.lc[order(test.schools.lc$ps_ward_id, test.schools.lc$past_aid_project),] 

test.schools.lc$test_projects_order=with(test.schools.lc, ave(rep(1, nrow(test.schools.lc)), ps_ward_id, FUN = seq_along))
test.schools.lc$test_projects_min=ifelse(test.schools.lc$test_projects_order==1, test.schools.lc$past_aid_project, NA)
test.schools.lc$test_projects_max=ifelse(test.schools.lc$test_projects_order==3, test.schools.lc$past_aid_project, NA)
test.schools.lc$test_projects_median=ifelse(test.schools.lc$test_projects_order==2, test.schools.lc$past_aid_project, NA)
temp.df=test.schools.lc[!is.na(test.schools.lc$test_projects_min),]
tests.lc$test_projects_min=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_projects_min"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_projects_median),]
tests.lc$test_projects_median=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_projects_median"]
temp.df=test.schools.lc[!is.na(test.schools.lc$test_projects_max),]
tests.lc$test_projects_max=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"test_projects_max"]

test.schools.lc$min_projects_letters=ifelse(test.schools.lc$test_projects_order==1 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$min_projects_letters=ifelse(test.schools.lc$test_projects_order==1 & test.schools.lc$map_letter=="B", "B", test.schools.lc$min_projects_letters)
test.schools.lc$min_projects_letters=ifelse(test.schools.lc$test_projects_order==1 & test.schools.lc$map_letter=="C", "C", test.schools.lc$min_projects_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$min_projects_letters),]
tests.lc$min_projects_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"min_projects_letters"]


test.schools.lc$median_projects_letters=ifelse(test.schools.lc$test_projects_order==2 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$median_projects_letters=ifelse(test.schools.lc$test_projects_order==2 & test.schools.lc$map_letter=="B", "B", test.schools.lc$median_projects_letters)
test.schools.lc$median_projects_letters=ifelse(test.schools.lc$test_projects_order==2 & test.schools.lc$map_letter=="C", "C", test.schools.lc$median_projects_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$median_projects_letters),]
tests.lc$median_projects_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"median_projects_letters"]

test.schools.lc$max_projects_letters=ifelse(test.schools.lc$test_projects_order==3 & test.schools.lc$map_letter=="A", "A", NA)
test.schools.lc$max_projects_letters=ifelse(test.schools.lc$test_projects_order==3 & test.schools.lc$map_letter=="B", "B", test.schools.lc$max_projects_letters)
test.schools.lc$max_projects_letters=ifelse(test.schools.lc$test_projects_order==3 & test.schools.lc$map_letter=="C", "C", test.schools.lc$max_projects_letters)
temp.df=test.schools.lc[!is.na(test.schools.lc$max_projects_letters),]
tests.lc$max_projects_letters=temp.df[match(tests.lc$ps_ward_id, temp.df$ps_ward_id),"max_projects_letters"]

tests.lc$min_projects_letters=ifelse(tests.lc$test_projects_median-tests.lc$test_projects_min==0 | (tests.lc$test_projects_median-tests.lc$test_projects_min)/tests.lc$test_projects_median<=testcut, paste(tests.lc$min_projects_letters, tests.lc$median_projects_letters, sep=","), tests.lc$min_projects_letters) 
tests.lc$median_projects_letters=ifelse(nchar(tests.lc$min_projects_letters)>1,tests.lc$min_projects_letters, tests.lc$median_projects_letters)
tests.lc$max_projects_letters=ifelse(tests.lc$test_projects_max-tests.lc$test_projects_median==0 | (tests.lc$test_projects_max-tests.lc$test_projects_median)/tests.lc$test_projects_max<=testcut, paste(tests.lc$max_projects_letters, tests.lc$median_projects_letters, sep=","), tests.lc$max_projects_letters) 
tests.lc$median_projects_letters=ifelse(nchar(tests.lc$max_projects_letters)>1,tests.lc$max_projects_letters, tests.lc$median_projects_letters)
tests.lc$min_projects_letters=ifelse(nchar(tests.lc$max_projects_letters)>4,tests.lc$max_projects_letters, tests.lc$min_projects_letters)


#setup variables for MP surveys
tests.mp=data.frame(constituencyid=unique(test.schools.mp$constituencyid))

#create variables for the order of the number of permanent classrooms
test.schools.mp <- test.schools.mp[order(test.schools.mp$constituencyid, test.schools.mp$school_classrooms_permanent),] 


test.schools.mp$test_classes_order=with(test.schools.mp, ave(rep(1, nrow(test.schools.mp)), constituencyid, FUN = seq_along))
test.schools.mp$test_classes_min=ifelse(test.schools.mp$test_classes_order==1, test.schools.mp$school_classrooms_permanent, NA)
test.schools.mp$test_classes_max=ifelse(test.schools.mp$test_classes_order==3, test.schools.mp$school_classrooms_permanent, NA)
test.schools.mp$test_classes_median=ifelse(test.schools.mp$test_classes_order==2, test.schools.mp$school_classrooms_permanent, NA)
temp.df=test.schools.mp[!is.na(test.schools.mp$test_classes_min),]
tests.mp$test_classes_min=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_classes_min"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_classes_median),]
tests.mp$test_classes_median=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_classes_median"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_classes_max),]
tests.mp$test_classes_max=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_classes_max"]

test.schools.mp$min_classes_letters=ifelse(test.schools.mp$test_classes_order==1 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$min_classes_letters=ifelse(test.schools.mp$test_classes_order==1 & test.schools.mp$map_letter=="B", "B", test.schools.mp$min_classes_letters)
test.schools.mp$min_classes_letters=ifelse(test.schools.mp$test_classes_order==1 & test.schools.mp$map_letter=="C", "C", test.schools.mp$min_classes_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$min_classes_letters),]
tests.mp$min_classes_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"min_classes_letters"]


test.schools.mp$median_classes_letters=ifelse(test.schools.mp$test_classes_order==2 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$median_classes_letters=ifelse(test.schools.mp$test_classes_order==2 & test.schools.mp$map_letter=="B", "B", test.schools.mp$median_classes_letters)
test.schools.mp$median_classes_letters=ifelse(test.schools.mp$test_classes_order==2 & test.schools.mp$map_letter=="C", "C", test.schools.mp$median_classes_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$median_classes_letters),]
tests.mp$median_classes_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"median_classes_letters"]

test.schools.mp$max_classes_letters=ifelse(test.schools.mp$test_classes_order==3 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$max_classes_letters=ifelse(test.schools.mp$test_classes_order==3 & test.schools.mp$map_letter=="B", "B", test.schools.mp$max_classes_letters)
test.schools.mp$max_classes_letters=ifelse(test.schools.mp$test_classes_order==3 & test.schools.mp$map_letter=="C", "C", test.schools.mp$max_classes_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$max_classes_letters),]
tests.mp$max_classes_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"max_classes_letters"]

tests.mp$min_classes_letters=ifelse(tests.mp$test_classes_median-tests.mp$test_classes_min==0 | (tests.mp$test_classes_median-tests.mp$test_classes_min)/tests.mp$test_classes_median<=testcut, paste(tests.mp$min_classes_letters, tests.mp$median_classes_letters, sep=","), tests.mp$min_classes_letters) 
tests.mp$median_classes_letters=ifelse(nchar(tests.mp$min_classes_letters)>1,tests.mp$min_classes_letters, tests.mp$median_classes_letters)
tests.mp$max_classes_letters=ifelse(tests.mp$test_classes_max-tests.mp$test_classes_median==0 | (tests.mp$test_classes_max-tests.mp$test_classes_median)/tests.mp$test_classes_max<=testcut, paste(tests.mp$max_classes_letters, tests.mp$median_classes_letters, sep=","), tests.mp$max_classes_letters) 
tests.mp$median_classes_letters=ifelse(nchar(tests.mp$max_classes_letters)>1,tests.mp$max_classes_letters, tests.mp$median_classes_letters)
tests.mp$min_classes_letters=ifelse(nchar(tests.mp$max_classes_letters)>4,tests.mp$max_classes_letters, tests.mp$min_classes_letters)

#create variables for most students
test.schools.mp <- test.schools.mp[order(test.schools.mp$constituencyid, test.schools.mp$school_enrollment_total),] 

test.schools.mp$test_enrollment_order=with(test.schools.mp, ave(rep(1, nrow(test.schools.mp)), constituencyid, FUN = seq_along))
test.schools.mp$test_enrollment_min=ifelse(test.schools.mp$test_enrollment_order==1, test.schools.mp$school_enrollment_total, NA)
test.schools.mp$test_enrollment_max=ifelse(test.schools.mp$test_enrollment_order==3, test.schools.mp$school_enrollment_total, NA)
test.schools.mp$test_enrollment_median=ifelse(test.schools.mp$test_enrollment_order==2, test.schools.mp$school_enrollment_total, NA)
temp.df=test.schools.mp[!is.na(test.schools.mp$test_enrollment_min),]
tests.mp$test_enrollment_min=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_enrollment_min"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_enrollment_median),]
tests.mp$test_enrollment_median=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_enrollment_median"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_enrollment_max),]
tests.mp$test_enrollment_max=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_enrollment_max"]

test.schools.mp$min_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==1 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$min_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==1 & test.schools.mp$map_letter=="B", "B", test.schools.mp$min_enrollment_letters)
test.schools.mp$min_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==1 & test.schools.mp$map_letter=="C", "C", test.schools.mp$min_enrollment_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$min_enrollment_letters),]
tests.mp$min_enrollment_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"min_enrollment_letters"]


test.schools.mp$median_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==2 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$median_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==2 & test.schools.mp$map_letter=="B", "B", test.schools.mp$median_enrollment_letters)
test.schools.mp$median_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==2 & test.schools.mp$map_letter=="C", "C", test.schools.mp$median_enrollment_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$median_enrollment_letters),]
tests.mp$median_enrollment_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"median_enrollment_letters"]

test.schools.mp$max_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==3 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$max_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==3 & test.schools.mp$map_letter=="B", "B", test.schools.mp$max_enrollment_letters)
test.schools.mp$max_enrollment_letters=ifelse(test.schools.mp$test_enrollment_order==3 & test.schools.mp$map_letter=="C", "C", test.schools.mp$max_enrollment_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$max_enrollment_letters),]
tests.mp$max_enrollment_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"max_enrollment_letters"]

tests.mp$min_enrollment_letters=ifelse(tests.mp$test_enrollment_median-tests.mp$test_enrollment_min==0 | (tests.mp$test_enrollment_median-tests.mp$test_enrollment_min)/tests.mp$test_enrollment_median<=testcut, paste(tests.mp$min_enrollment_letters, tests.mp$median_enrollment_letters, sep=","), tests.mp$min_enrollment_letters) 
tests.mp$median_enrollment_letters=ifelse(nchar(tests.mp$min_enrollment_letters)>1,tests.mp$min_enrollment_letters, tests.mp$median_enrollment_letters)
tests.mp$max_enrollment_letters=ifelse(tests.mp$test_enrollment_max-tests.mp$test_enrollment_median==0 | (tests.mp$test_enrollment_max-tests.mp$test_enrollment_median)/tests.mp$test_enrollment_max<=testcut, paste(tests.mp$max_enrollment_letters, tests.mp$median_enrollment_letters, sep=","), tests.mp$max_enrollment_letters) 
tests.mp$median_enrollment_letters=ifelse(nchar(tests.mp$max_enrollment_letters)>1,tests.mp$max_enrollment_letters, tests.mp$median_enrollment_letters)
tests.mp$min_enrollment_letters=ifelse(nchar(tests.mp$max_enrollment_letters)>4,tests.mp$max_enrollment_letters, tests.mp$min_enrollment_letters)


#create variables for support in election
test.schools.mp <- test.schools.mp[order(test.schools.mp$constituencyid, test.schools.mp$winner_percent_parliamentary),] 

test.schools.mp$test_percentvotes_order=with(test.schools.mp, ave(rep(1, nrow(test.schools.mp)), constituencyid, FUN = seq_along))
test.schools.mp$test_percentvotes_min=ifelse(test.schools.mp$test_percentvotes_order==1, test.schools.mp$winner_percent_parliamentary, NA)
test.schools.mp$test_percentvotes_max=ifelse(test.schools.mp$test_percentvotes_order==3, test.schools.mp$winner_percent_parliamentary, NA)
test.schools.mp$test_percentvotes_median=ifelse(test.schools.mp$test_percentvotes_order==2, test.schools.mp$winner_percent_parliamentary, NA)

temp.df=test.schools.mp[!is.na(test.schools.mp$test_percentvotes_min),]
tests.mp$test_percentvotes_min=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_percentvotes_min"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_percentvotes_median),]
tests.mp$test_percentvotes_median=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_percentvotes_median"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_percentvotes_max),]
tests.mp$test_percentvotes_max=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_percentvotes_max"]

test.schools.mp$min_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==1 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$min_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==1 & test.schools.mp$map_letter=="B", "B", test.schools.mp$min_percentvotes_letters)
test.schools.mp$min_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==1 & test.schools.mp$map_letter=="C", "C", test.schools.mp$min_percentvotes_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$min_percentvotes_letters),]
tests.mp$min_percentvotes_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"min_percentvotes_letters"]


test.schools.mp$median_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==2 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$median_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==2 & test.schools.mp$map_letter=="B", "B", test.schools.mp$median_percentvotes_letters)
test.schools.mp$median_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==2 & test.schools.mp$map_letter=="C", "C", test.schools.mp$median_percentvotes_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$median_percentvotes_letters),]
tests.mp$median_percentvotes_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"median_percentvotes_letters"]

test.schools.mp$max_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==3 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$max_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==3 & test.schools.mp$map_letter=="B", "B", test.schools.mp$max_percentvotes_letters)
test.schools.mp$max_percentvotes_letters=ifelse(test.schools.mp$test_percentvotes_order==3 & test.schools.mp$map_letter=="C", "C", test.schools.mp$max_percentvotes_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$max_percentvotes_letters),]
tests.mp$max_percentvotes_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"max_percentvotes_letters"]

tests.mp$min_percentvotes_letters=ifelse(tests.mp$test_percentvotes_median-tests.mp$test_percentvotes_min==0 | (tests.mp$test_percentvotes_median-tests.mp$test_percentvotes_min)/tests.mp$test_percentvotes_median<=testcut, paste(tests.mp$min_percentvotes_letters, tests.mp$median_percentvotes_letters, sep=","), tests.mp$min_percentvotes_letters) 
tests.mp$median_percentvotes_letters=ifelse(nchar(tests.mp$min_percentvotes_letters)>1,tests.mp$min_percentvotes_letters, tests.mp$median_percentvotes_letters)
tests.mp$max_percentvotes_letters=ifelse(tests.mp$test_percentvotes_max-tests.mp$test_percentvotes_median==0 | (tests.mp$test_percentvotes_max-tests.mp$test_percentvotes_median)/tests.mp$test_percentvotes_max<=testcut, paste(tests.mp$max_percentvotes_letters, tests.mp$median_percentvotes_letters, sep=","), tests.mp$max_percentvotes_letters) 
tests.mp$median_percentvotes_letters=ifelse(nchar(tests.mp$max_percentvotes_letters)>1,tests.mp$max_percentvotes_letters, tests.mp$median_percentvotes_letters)
tests.mp$min_percentvotes_letters=ifelse(nchar(tests.mp$max_percentvotes_letters)>4,tests.mp$max_percentvotes_letters, tests.mp$min_percentvotes_letters)



#create variables for aid projects
test.schools.mp <- test.schools.mp[order(test.schools.mp$constituencyid, test.schools.mp$past_aid_project),] 

test.schools.mp$test_projects_order=with(test.schools.mp, ave(rep(1, nrow(test.schools.mp)), constituencyid, FUN = seq_along))
test.schools.mp$test_projects_min=ifelse(test.schools.mp$test_projects_order==1, test.schools.mp$past_aid_project, NA)
test.schools.mp$test_projects_max=ifelse(test.schools.mp$test_projects_order==3, test.schools.mp$past_aid_project, NA)
test.schools.mp$test_projects_median=ifelse(test.schools.mp$test_projects_order==2, test.schools.mp$past_aid_project, NA)
temp.df=test.schools.mp[!is.na(test.schools.mp$test_projects_min),]
tests.mp$test_projects_min=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_projects_min"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_projects_median),]
tests.mp$test_projects_median=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_projects_median"]
temp.df=test.schools.mp[!is.na(test.schools.mp$test_projects_max),]
tests.mp$test_projects_max=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"test_projects_max"]



test.schools.mp$min_projects_letters=ifelse(test.schools.mp$test_projects_order==1 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$min_projects_letters=ifelse(test.schools.mp$test_projects_order==1 & test.schools.mp$map_letter=="B", "B", test.schools.mp$min_projects_letters)
test.schools.mp$min_projects_letters=ifelse(test.schools.mp$test_projects_order==1 & test.schools.mp$map_letter=="C", "C", test.schools.mp$min_projects_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$min_projects_letters),]
tests.mp$min_projects_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"min_projects_letters"]


test.schools.mp$median_projects_letters=ifelse(test.schools.mp$test_projects_order==2 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$median_projects_letters=ifelse(test.schools.mp$test_projects_order==2 & test.schools.mp$map_letter=="B", "B", test.schools.mp$median_projects_letters)
test.schools.mp$median_projects_letters=ifelse(test.schools.mp$test_projects_order==2 & test.schools.mp$map_letter=="C", "C", test.schools.mp$median_projects_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$median_projects_letters),]
tests.mp$median_projects_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"median_projects_letters"]

test.schools.mp$max_projects_letters=ifelse(test.schools.mp$test_projects_order==3 & test.schools.mp$map_letter=="A", "A", NA)
test.schools.mp$max_projects_letters=ifelse(test.schools.mp$test_projects_order==3 & test.schools.mp$map_letter=="B", "B", test.schools.mp$max_projects_letters)
test.schools.mp$max_projects_letters=ifelse(test.schools.mp$test_projects_order==3 & test.schools.mp$map_letter=="C", "C", test.schools.mp$max_projects_letters)
temp.df=test.schools.mp[!is.na(test.schools.mp$max_projects_letters),]
tests.mp$max_projects_letters=temp.df[match(tests.mp$constituencyid, temp.df$constituencyid),"max_projects_letters"]

tests.mp$min_projects_letters=ifelse(tests.mp$test_projects_median-tests.mp$test_projects_min==0 | (tests.mp$test_projects_median-tests.mp$test_projects_min)/tests.mp$test_projects_median<=testcut, paste(tests.mp$min_projects_letters, tests.mp$median_projects_letters, sep=","), tests.mp$min_projects_letters) 
tests.mp$median_projects_letters=ifelse(nchar(tests.mp$min_projects_letters)>1,tests.mp$min_projects_letters, tests.mp$median_projects_letters)
tests.mp$max_projects_letters=ifelse(tests.mp$test_projects_max-tests.mp$test_projects_median==0 | (tests.mp$test_projects_max-tests.mp$test_projects_median)/tests.mp$test_projects_max<=testcut, paste(tests.mp$max_projects_letters, tests.mp$median_projects_letters, sep=","), tests.mp$max_projects_letters) 
tests.mp$median_projects_letters=ifelse(nchar(tests.mp$max_projects_letters)>1,tests.mp$max_projects_letters, tests.mp$median_projects_letters)
tests.mp$min_projects_letters=ifelse(nchar(tests.mp$max_projects_letters)>4,tests.mp$max_projects_letters, tests.mp$min_projects_letters)

#Get donors associated with the knowledge map
test.schools.lcA=test.schools.lc[test.schools.lc$map_letter=="A",]
test.schools.lcB=test.schools.lc[test.schools.lc$map_letter=="B",]
test.schools.lcC=test.schools.lc[test.schools.lc$map_letter=="C",]

tests.lc$test_donors = test.schools.lcA[match(tests.lc$ps_ward_id, test.schools.lcA$ps_ward_id),"donor1"]
tests.lc$test_donors = paste(test.schools.lcB[match(tests.lc$ps_ward_id, test.schools.lcB$ps_ward_id),"donor1"], tests.lc$test_donors, sep=",")
tests.lc$test_donors = paste(test.schools.lcC[match(tests.lc$ps_ward_id, test.schools.lcC$ps_ward_id),"donor1"], tests.lc$test_donors, sep=",")

tests.lc$test_donors = paste(test.schools.lcA[match(tests.lc$ps_ward_id, test.schools.lcA$ps_ward_id),"donor2"], tests.lc$test_donors, sep=",")
tests.lc$test_donors = paste(test.schools.lcB[match(tests.lc$ps_ward_id, test.schools.lcB$ps_ward_id),"donor2"], tests.lc$test_donors, sep=",")
tests.lc$test_donors = paste(test.schools.lcC[match(tests.lc$ps_ward_id, test.schools.lcC$ps_ward_id),"donor2"], tests.lc$test_donors, sep=",")

tests.lc$test_donors = paste(test.schools.lcA[match(tests.lc$ps_ward_id, test.schools.lcA$ps_ward_id),"donor3"], tests.lc$test_donors, sep=",")
tests.lc$test_donors = paste(test.schools.lcB[match(tests.lc$ps_ward_id, test.schools.lcB$ps_ward_id),"donor3"], tests.lc$test_donors, sep=",")
tests.lc$test_donors = paste(test.schools.lcC[match(tests.lc$ps_ward_id, test.schools.lcC$ps_ward_id),"donor3"], tests.lc$test_donors, sep=",")

#cleanup
tests.lc$test_donors=gsub("NA", "", tests.lc$test_donors)


#Get donors associated with the knowledge map
test.schools.mpA=test.schools.mp[test.schools.mp$map_letter=="A",]
test.schools.mpB=test.schools.mp[test.schools.mp$map_letter=="B",]
test.schools.mpC=test.schools.mp[test.schools.mp$map_letter=="C",]

tests.mp$test_donors = test.schools.mpA[match(tests.mp$constituencyid, test.schools.mpA$constituencyid),"donor1"]
tests.mp$test_donors = paste(test.schools.mpB[match(tests.mp$constituencyid, test.schools.mpB$constituencyid),"donor1"], tests.mp$test_donors, sep=",")
tests.mp$test_donors = paste(test.schools.mpC[match(tests.mp$constituencyid, test.schools.mpC$constituencyid),"donor1"], tests.mp$test_donors, sep=",")

tests.mp$test_donors = paste(test.schools.mpA[match(tests.mp$constituencyid, test.schools.mpA$constituencyid),"donor2"], tests.mp$test_donors, sep=",")
tests.mp$test_donors = paste(test.schools.mpB[match(tests.mp$constituencyid, test.schools.mpB$constituencyid),"donor2"], tests.mp$test_donors, sep=",")
tests.mp$test_donors = paste(test.schools.mpC[match(tests.mp$constituencyid, test.schools.mpC$constituencyid),"donor2"], tests.mp$test_donors, sep=",")

tests.mp$test_donors = paste(test.schools.mpA[match(tests.mp$constituencyid, test.schools.mpA$constituencyid),"donor3"], tests.mp$test_donors, sep=",")
tests.mp$test_donors = paste(test.schools.mpB[match(tests.mp$constituencyid, test.schools.mpB$constituencyid),"donor3"], tests.mp$test_donors, sep=",")
tests.mp$test_donors = paste(test.schools.mpC[match(tests.mp$constituencyid, test.schools.mpC$constituencyid),"donor3"], tests.mp$test_donors, sep=",")

#cleanup
tests.mp$test_donors=gsub("NA", "", tests.mp$test_donors)
#this.df=merge(c.survey.all, tests.lc, by="ps_ward_id")
#this.df$test_donors 


for(this.dfname in c("mp.survey", "c.survey", "mp.survey.all", "c.survey.all")){
#  for(this.dfname in c("c.survey.all")){
    
  this.df=eval(parse(text=this.dfname))
  if(this.dfname=="c.survey"){this.df=merge(this.df, tests.lc, by="ps_ward_id")}
  if(this.dfname=="c.survey.all"){this.df=merge(this.df, tests.lc, by="ps_ward_id")}
  if(this.dfname=="mp.survey"){this.df=merge(this.df, tests.mp, by="constituencyid")}
  if(this.dfname=="mp.survey.all"){this.df=merge(this.df, tests.mp, by="constituencyid")}
  
  
  #equals TRUE if the respondent correctly selected the school with the least number of permanent classrooms
  this.df$test_least_classes=ifelse(this.df$aid_35=="School A" & nchar(this.df$min_classes_letters)<5, grepl("A",this.df$min_classes_letters), FALSE)
  this.df$test_least_classes=ifelse(this.df$aid_35=="School B" & nchar(this.df$min_classes_letters)<5, grepl("B",this.df$min_classes_letters), this.df$test_least_classes)
  this.df$test_least_classes=ifelse(this.df$aid_35=="School C" & nchar(this.df$min_classes_letters)<5, grepl("C",this.df$min_classes_letters), this.df$test_least_classes)
  this.df$test_least_classes=ifelse(this.df$aid_35=="A, B, and C have the same number of permanent classrooms" & nchar(this.df$min_classes_letters)==5, TRUE, this.df$test_least_classes)
  this.df$test_least_classes=ifelse(this.df$aid_35=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_least_classes)
  this.df$test_least_classes=ifelse(this.df$aid_35=="Decline to answer" , FALSE, this.df$test_least_classes)
  
  

  #equals TRUE if the respondent correctly selected the school with the most students
  this.df$test_most_enrollment=ifelse(this.df$aid_36=="School A" & nchar(this.df$max_enrollment_letters)<5, grepl("A",this.df$max_enrollment_letters), FALSE)
  this.df$test_most_enrollment=ifelse(this.df$aid_36=="School B" & nchar(this.df$max_enrollment_letters)<5, grepl("B",this.df$max_enrollment_letters), this.df$test_most_enrollment)
  this.df$test_most_enrollment=ifelse(this.df$aid_36=="School C" & nchar(this.df$max_enrollment_letters)<5, grepl("C",this.df$max_enrollment_letters), this.df$test_most_enrollment)
  this.df$test_most_enrollment=ifelse(this.df$aid_36=="A, B, and C have the same number of students" & nchar(this.df$max_enrollment_letters)==5, TRUE, this.df$test_most_enrollment)
  this.df$test_most_enrollment=ifelse(this.df$aid_36=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_most_enrollment)
  this.df$test_most_enrollment=ifelse(this.df$aid_36=="Decline to answer" , FALSE, this.df$test_most_enrollment)
  
  #determine the number of students in the school selected above
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School A" & grepl("A",this.df$max_enrollment_letters), this.df$test_enrollment_max, NA)
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School A" & grepl("A",this.df$median_enrollment_letters), this.df$test_enrollment_median, this.df$selected_enrollment)
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School A" & grepl("A",this.df$min_enrollment_letters), this.df$test_enrollment_min, this.df$selected_enrollment)
  
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School B" & grepl("B",this.df$max_enrollment_letters), this.df$test_enrollment_max, this.df$selected_enrollment)
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School B" & grepl("B",this.df$median_enrollment_letters), this.df$test_enrollment_median, this.df$selected_enrollment)
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School B" & grepl("B",this.df$min_enrollment_letters), this.df$test_enrollment_min, this.df$selected_enrollment)
  
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School C" & grepl("C",this.df$max_enrollment_letters), this.df$test_enrollment_max, this.df$selected_enrollment)
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School C" & grepl("C",this.df$median_enrollment_letters), this.df$test_enrollment_median, this.df$selected_enrollment)
  this.df$selected_enrollment=ifelse(this.df$aid_36=="School C" & grepl("C",this.df$min_enrollment_letters), this.df$test_enrollment_min, this.df$selected_enrollment)
  
  this.df$selected_enrollment=ifelse(this.df$aid_36=="A, B, and C have the same number of students", this.df$test_enrollment_median, this.df$selected_enrollment)
  
  
  #equals TRUE if the respondent correctly selected the enrollment in the school chosen above
  this.df$test_enrollment_specific = FALSE
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="Between 100 and 300" & (this.df$selected_enrollment>=100 & this.df$selected_enrollment<=300), TRUE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="Between 300 and 500" & (this.df$selected_enrollment>=300 & this.df$selected_enrollment<=500), TRUE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="Between 500 and 1000" & (this.df$selected_enrollment>=500 & this.df$selected_enrollment<=1000), TRUE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="Between 1000 and 1500" & (this.df$selected_enrollment>=1000 & this.df$selected_enrollment<=1500), TRUE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="Between 1500 and 2000" & (this.df$selected_enrollment>=1500 & this.df$selected_enrollment<=2000), TRUE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="More than 2000" & (this.df$selected_enrollment>2000), TRUE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="Decline to answer" , FALSE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_37=="" | is.na(this.df$aid_37) , FALSE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_36=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_enrollment_specific)
  this.df$test_enrollment_specific=ifelse(this.df$aid_36=="Decline to answer" , FALSE, this.df$test_enrollment_specific)
  
  #calculate the differnce between guess and reality
  this.df$test_enrollment_diff=NA
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 100 and 300" , (200-this.df$selected_enrollment), this.df$test_enrollment_diff)
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 100 and 300" & (this.df$selected_enrollment>=100 & this.df$selected_enrollment<=300), 0, this.df$test_enrollment_diff)
  
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 300 and 500", (400-this.df$selected_enrollment), this.df$test_enrollment_diff)
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 300 and 500" & (this.df$selected_enrollment>=300 & this.df$selected_enrollment<=500), 0, this.df$test_enrollment_diff)
  
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 500 and 1000" , (750-this.df$selected_enrollment), this.df$test_enrollment_diff)
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 500 and 1000" & (this.df$selected_enrollment>=500 & this.df$selected_enrollment<=1000), 0, this.df$test_enrollment_diff)
  
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 1000 and 1500", (1250-this.df$selected_enrollment), this.df$test_enrollment_diff)
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 1000 and 1500" & (this.df$selected_enrollment>=1000 & this.df$selected_enrollment<=1500), 0, this.df$test_enrollment_diff)
  
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 1500 and 2000", (1750-this.df$selected_enrollment), this.df$test_enrollment_diff)
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="Between 1500 and 2000" & (this.df$selected_enrollment>=1500 & this.df$selected_enrollment<=2000), 0, this.df$test_enrollment_diff)
  
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="More than 2000", (2000-this.df$selected_enrollment), this.df$test_enrollment_diff)
  this.df$test_enrollment_diff=ifelse(this.df$aid_37=="More than 2000" & (this.df$selected_enrollment>=2000), 0, this.df$test_enrollment_diff)
  
  
  #equals TRUE if the respondent correctly guessed the school with the least percent votes
  this.df$test_least_percentvotes=ifelse(this.df$aid_38=="School A" & nchar(this.df$min_percentvotes_letters)<5, grepl("A",this.df$min_percentvotes_letters), FALSE)
  this.df$test_least_percentvotes=ifelse(this.df$aid_38=="School B" & nchar(this.df$min_percentvotes_letters)<5, grepl("B",this.df$min_percentvotes_letters), this.df$test_least_percentvotes)
  this.df$test_least_percentvotes=ifelse(this.df$aid_38=="School C" & nchar(this.df$min_percentvotes_letters)<5, grepl("C",this.df$min_percentvotes_letters), this.df$test_least_percentvotes)
  this.df$test_least_percentvotes=ifelse(this.df$aid_38=="I received the same percentage of the votes in the areas around all three schools" & nchar(this.df$min_percentvotes_letters)==5, TRUE, this.df$test_least_percentvotes)
  this.df$test_least_percentvotes=ifelse(this.df$aid_38=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_least_percentvotes)
  this.df$test_least_percentvotes=ifelse(this.df$aid_38=="Decline to answer" , FALSE, this.df$test_least_percentvotes)
  
  #determine the percent votes in the school selected above
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School A" & grepl("A",this.df$max_percentvotes_letters), this.df$test_percentvotes_max, NA)
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School A" & grepl("A",this.df$median_percentvotes_letters), this.df$test_percentvotes_median, this.df$selected_percentvotes)
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School A" & grepl("A",this.df$min_percentvotes_letters), this.df$test_percentvotes_min, this.df$selected_percentvotes)
  
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School B" & grepl("B",this.df$max_percentvotes_letters), this.df$test_percentvotes_max, this.df$selected_percentvotes)
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School B" & grepl("B",this.df$median_percentvotes_letters), this.df$test_percentvotes_median, this.df$selected_percentvotes)
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School B" & grepl("B",this.df$min_percentvotes_letters), this.df$test_percentvotes_min, this.df$selected_percentvotes)
  
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School C" & grepl("C",this.df$max_percentvotes_letters), this.df$test_percentvotes_max, this.df$selected_percentvotes)
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School C" & grepl("C",this.df$median_percentvotes_letters), this.df$test_percentvotes_median, this.df$selected_percentvotes)
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="School C" & grepl("C",this.df$min_percentvotes_letters), this.df$test_percentvotes_min, this.df$selected_percentvotes)
  
  this.df$selected_percentvotes=ifelse(this.df$aid_38=="I received the same percentage of the votes in the areas around all three schools", this.df$test_percentvotes_median, this.df$selected_percentvotes)
  this.df[is.na(this.df$selected_percentvotes),]$aid_38
  
  #equals TRUE if the respondent correctly selected the pecent votes in the school chosen above
  this.df$test_percentvotes_specific = FALSE
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Between 10 and 20%" & (this.df$selected_percentvotes>=0.1 & this.df$selected_percentvotes<=0.2), TRUE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Between 20 and 30%" & (this.df$selected_percentvotes>=0.2 & this.df$selected_percentvotes<=0.3), TRUE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Between 40 and 50%" & (this.df$selected_percentvotes>=0.4 & this.df$selected_percentvotes<=0.5), TRUE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Between 50 and 60%" & (this.df$selected_percentvotes>=0.5 & this.df$selected_percentvotes<=0.6), TRUE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Between 60 and 70%" & (this.df$selected_percentvotes>=0.6 & this.df$selected_percentvotes<=0.7), TRUE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Less than 10%" & (this.df$selected_percentvotes<0.1), TRUE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="More than 70%" & (this.df$selected_percentvotes>0.7), TRUE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="Decline to answer" , FALSE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_39=="" | is.na(this.df$aid_39) , FALSE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_38=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_percentvotes_specific)
  this.df$test_percentvotes_specific=ifelse(this.df$aid_38=="Decline to answer" , FALSE, this.df$test_percentvotes_specific)
  
  
  
  this.df$test_percentvotes_diff = NA
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 10 and 20%", 0.15-this.df$selected_percentvotes, this.df$test_percentvotes_diff)
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 10 and 20%" & (this.df$selected_percentvotes>=0.1 & this.df$selected_percentvotes<=0.2), 0, this.df$test_percentvotes_diff)
  
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 20 and 30%", 0.25-this.df$selected_percentvotes, this.df$test_percentvotes_diff)
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 20 and 30%" & (this.df$selected_percentvotes>=0.2 & this.df$selected_percentvotes<=0.3), 0, this.df$test_percentvotes_diff)
  
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 40 and 50%" , 0.45-this.df$selected_percentvotes, this.df$test_percentvotes_diff)
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 40 and 50%" & (this.df$selected_percentvotes>=0.4 & this.df$selected_percentvotes<=0.5), 0, this.df$test_percentvotes_diff)
  
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 50 and 60%", 0.55-this.df$selected_percentvotes, this.df$test_percentvotes_diff)
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 50 and 60%" & (this.df$selected_percentvotes>=0.5 & this.df$selected_percentvotes<=0.6), 0, this.df$test_percentvotes_diff)
  
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 60 and 70%", 0.65-this.df$selected_percentvotes, this.df$test_percentvotes_diff)
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Between 60 and 70%" & (this.df$selected_percentvotes>=0.6 & this.df$selected_percentvotes<=0.7), 0, this.df$test_percentvotes_diff)
  
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Less than 10%", 0.05-this.df$selected_percentvotes, this.df$test_percentvotes_diff)
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="Less than 10%" & (this.df$selected_percentvotes<0.1), 0, this.df$test_percentvotes_diff)
  
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="More than 70%", 0.8-this.df$selected_percentvotes, this.df$test_percentvotes_diff)
  this.df$test_percentvotes_diff=ifelse(this.df$aid_39=="More than 70%" & (this.df$selected_percentvotes>0.7), 0, this.df$test_percentvotes_diff)
  
  
  #equals TRUE if the respondent correctly guessed the school with the most aid projects
  this.df$test_most_projects=ifelse(this.df$aid_40=="School A" & nchar(this.df$min_projects_letters)<5, grepl("A",this.df$min_projects_letters), FALSE)
  this.df$test_most_projects=ifelse(this.df$aid_40=="School B" & nchar(this.df$min_projects_letters)<5, grepl("B",this.df$min_projects_letters), this.df$test_most_projects)
  this.df$test_most_projects=ifelse(this.df$aid_40=="School C" & nchar(this.df$min_projects_letters)<5, grepl("C",this.df$min_projects_letters), this.df$test_most_projects)
  this.df$test_most_projects=ifelse(this.df$aid_40=="All these schools received projects" & nchar(this.df$min_projects_letters)==5, TRUE, this.df$test_most_projects)
  this.df$test_most_projects=ifelse(this.df$aid_40=="None of these schools received projects" & this.df$test_projects_max==0, TRUE, this.df$test_most_projects)
  this.df$test_most_projects=ifelse(this.df$aid_40=="Don?<U+0080><U+0099>t know" , FALSE, this.df$test_most_projects)
  this.df$test_most_projects=ifelse(this.df$aid_40=="Decline to answer" , FALSE, this.df$test_most_projects)
  
  #code specific donor question
  this.df$test_donors_split=strsplit(this.df$test_donors, ",")
  
  this.df$test_donor_specific=0
  this.df$d1=unlist(lapply(this.df$test_donors_split, "[", 1))
  this.df$d2=unlist(lapply(this.df$test_donors_split, "[", 2))
  this.df$d3=unlist(lapply(this.df$test_donors_split, "[", 3))
  this.df$d4=unlist(lapply(this.df$test_donors_split, "[", 4))
  this.df$d5=unlist(lapply(this.df$test_donors_split, "[", 5))
  this.df$d6=unlist(lapply(this.df$test_donors_split, "[", 6))
  this.df$d7=unlist(lapply(this.df$test_donors_split, "[", 7))
  this.df$d8=unlist(lapply(this.df$test_donors_split, "[", 8))
  this.df$d9=unlist(lapply(this.df$test_donors_split, "[", 9))
  
  this.df$d1=sub("world food program", "WFP", this.df$d1, ignore.case = TRUE)
  this.df$d2=sub("world food program", "WFP", this.df$d2, ignore.case = TRUE)
  this.df$d3=sub("world food program", "WFP", this.df$d3, ignore.case = TRUE)
  this.df$d4=sub("world food program", "WFP", this.df$d4, ignore.case = TRUE)
  this.df$d5=sub("world food program", "WFP", this.df$d5, ignore.case = TRUE)
  this.df$d6=sub("world food program", "WFP", this.df$d6, ignore.case = TRUE)
  this.df$d7=sub("world food program", "WFP", this.df$d7, ignore.case = TRUE)
  this.df$d8=sub("world food program", "WFP", this.df$d8, ignore.case = TRUE)
  this.df$d9=sub("world food program", "WFP", this.df$d9, ignore.case = TRUE)
  
  this.df$aid_41=sub("world food programme", "WFP", this.df$aid_41, ignore.case = TRUE)
  this.df$aid_41=sub("world food program", "WFP", this.df$aid_41, ignore.case = TRUE)
  
  this.df$aid_41=sub("Germany", "GIZ", this.df$aid_41, ignore.case = TRUE)
  this.df$aid_41=sub("German", "GIZ", this.df$aid_41, ignore.case = TRUE)
  this.df$aid_41=sub("England", "DFID", this.df$aid_41, ignore.case = TRUE)
  
  this.df$d1=ifelse(this.df$d1=="" | is.na(this.df$d1), "NO_DONORS", this.df$d1)
  this.df$d2=ifelse(this.df$d2=="" | is.na(this.df$d2), "NO_DONORS", this.df$d2)
  this.df$d3=ifelse(this.df$d3=="" | is.na(this.df$d3), "NO_DONORS", this.df$d3)
  this.df$d4=ifelse(this.df$d4=="" | is.na(this.df$d4), "NO_DONORS", this.df$d4)
  this.df$d5=ifelse(this.df$d5=="" | is.na(this.df$d5), "NO_DONORS", this.df$d5)
  this.df$d6=ifelse(this.df$d6=="" | is.na(this.df$d6), "NO_DONORS", this.df$d6)
  this.df$d7=ifelse(this.df$d7=="" | is.na(this.df$d7), "NO_DONORS", this.df$d7)
  this.df$d8=ifelse(this.df$d8=="" | is.na(this.df$d8), "NO_DONORS", this.df$d8)
  this.df$d9=ifelse(this.df$d9=="" | is.na(this.df$d9), "NO_DONORS", this.df$d9)
 
  this.df$d1=trimws(this.df$d1, which = c("both"))
  this.df$d2=trimws(this.df$d2, which = c("both"))
  this.df$d3=trimws(this.df$d3, which = c("both"))
  this.df$d4=trimws(this.df$d4, which = c("both"))
  this.df$d5=trimws(this.df$d5, which = c("both"))
  this.df$d6=trimws(this.df$d6, which = c("both"))
  this.df$d7=trimws(this.df$d7, which = c("both"))
  this.df$d8=trimws(this.df$d8, which = c("both"))
  this.df$d9=trimws(this.df$d9, which = c("both"))
  
  this.df$test_donor_specific=mapply(grepl, pattern=this.df$d1, x=this.df$aid_41, ignore.case=TRUE)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d2, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d3, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d4, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d5, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d6, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d7, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d8, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, mapply(grepl, pattern=this.df$d9, x=this.df$aid_41, ignore.case=TRUE), this.df$test_donor_specific)
  
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d2)==toupper(this.df$aid_41), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d3)==toupper(this.df$aid_41), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d4)==toupper(this.df$aid_41), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d5)==toupper(this.df$aid_41), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d6)==toupper(this.df$aid_41), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d7)==toupper(this.df$aid_41), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d8)==toupper(this.df$aid_41), this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, toupper(this.df$d9)==toupper(this.df$aid_41), this.df$test_donor_specific)
  
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==FALSE, 0, this.df$test_donor_specific)
  this.df$test_donor_specific=ifelse(this.df$test_donor_specific==TRUE, 1, this.df$test_donor_specific)
  
  this.df$nodonor=ifelse( this.df$d9=="NO_DONORS" & this.df$d8=="NO_DONORS" & this.df$d7=="NO_DONORS" & this.df$d6=="NO_DONORS" & this.df$d5=="NO_DONORS" & this.df$d4=="NO_DONORS" & this.df$d3=="NO_DONORS" & this.df$d2=="NO_DONORS" & this.df$d1=="NO_DONORS", TRUE, FALSE)
  
  #if there are no donors in the map, and the incumbents did not name any major donors, then set to NA 
  this.df$test_donor_specific=ifelse(this.df$nodonor==TRUE & this.df$test_donor_specific==FALSE & !grepl(pattern="WFP", x=this.df$aid_41, ignore.case=TRUE)  & !grepl(pattern="UNICEF", x=this.df$aid_41, ignore.case=TRUE)  & !grepl(pattern="DFID", x=this.df$aid_41, ignore.case=TRUE) & !grepl(pattern="GIZ", x=this.df$aid_41, ignore.case=TRUE), NA, this.df$test_donor_specific)
  
  xtabs(~this.df$test_donor_specific)

  
  this.df$test_donors_split=NULL
  this.df$test_donors=NULL
  this.df$d1=NULL
  this.df$d2=NULL
  this.df$d3=NULL
  this.df$d4=NULL
  this.df$d5=NULL
  this.df$d6=NULL
  this.df$d7=NULL
  this.df$d8=NULL
  this.df$d9=NULL
  
  
  assign(this.dfname, this.df )
  
}
###########################################################################
#create a quiz data frame. This has a single row for each question.
quiz.df=data.frame(question_id=NA, school_id=NA, question_type="", constituencyid=NA, ps_ward_id=NA, correct=NA, mp=NA, school_latitude=NA, school_longitude=NA)
i=0
for(this.const in unique(mp.survey$constituencyid)){
 #this.const=11
  i=i+1
  quiz.df[i,]$mp=1
  quiz.df[i,]$constituencyid=this.const
  quiz.df[i,]$ps_ward_id=mp.survey[mp.survey$constituencyid==this.const,]$ps_ward_id[1]
  quiz.df[i,]$question_id=1
  quiz.df[i,]$question_type="test_least_classes"
  this.school_id=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_classes_order==1,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=mp.survey[mp.survey$constituencyid==this.const,]$test_least_classes[1]
  
  lat3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_classes_order==3,]$school_latitude
  long3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_classes_order==3,]$school_longitude
  lat2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_classes_order==2,]$school_latitude
  long2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_classes_order==2,]$school_longitude
  lat1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_classes_order==1,]$school_latitude
  long1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_classes_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat1
  quiz.df[i,]$school_longitude=long1
  
  #if median and min are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_classes_min, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_classes_median, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2)/2
  #  quiz.df[i,]$school_longitude=(long1+long2)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_classes_max, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_classes_min, na.rm=T)){
    
  #  quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
   # quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  
  i=i+1
  quiz.df[i,]$mp=1
  quiz.df[i,]$constituencyid=this.const
  quiz.df[i,]$ps_ward_id=mp.survey[mp.survey$constituencyid==this.const,]$ps_ward_id[1]
  quiz.df[i,]$question_id=2
  quiz.df[i,]$question_type="test_most_enrollment"
  this.school_id=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_enrollment_order==3,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=mp.survey[mp.survey$constituencyid==this.const,]$test_most_enrollment[1]
  
  
  lat3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_enrollment_order==3,]$school_latitude
  long3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_enrollment_order==3,]$school_longitude
  lat2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_enrollment_order==2,]$school_latitude
  long2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_enrollment_order==2,]$school_longitude
  lat1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_enrollment_order==1,]$school_latitude
  long1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_enrollment_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat3
  quiz.df[i,]$school_longitude=long3
  
  #if median and max are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_enrollment_max, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_enrollment_median, na.rm=T)){
    
    #quiz.df[i,]$school_latitude=(lat2+lat3)/2
    #quiz.df[i,]$school_longitude=(long2+long3)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_enrollment_max, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_enrollment_min, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
  #  quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  i=i+1
  quiz.df[i,]$mp=1
  quiz.df[i,]$constituencyid=this.const
  quiz.df[i,]$ps_ward_id=mp.survey[mp.survey$constituencyid==this.const,]$ps_ward_id[1]
  quiz.df[i,]$question_id=3
  quiz.df[i,]$question_type="test_least_percentvotes"
  this.school_id=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_percentvotes_order==1,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=mp.survey[mp.survey$constituencyid==this.const,]$test_least_percentvotes[1]
  
  lat3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_percentvotes_order==3,]$school_latitude
  long3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_percentvotes_order==3,]$school_longitude
  lat2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_percentvotes_order==2,]$school_latitude
  long2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_percentvotes_order==2,]$school_longitude
  lat1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_percentvotes_order==1,]$school_latitude
  long1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_percentvotes_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat1
  quiz.df[i,]$school_longitude=long1
  
  #if median and min are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_percentvotes_min, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_percentvotes_median, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2)/2
  #  quiz.df[i,]$school_longitude=(long1+long2)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_percentvotes_max, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_percentvotes_min, na.rm=T)){
    
    # quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
    # quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  i=i+1
  quiz.df[i,]$mp=1
  quiz.df[i,]$constituencyid=this.const
  quiz.df[i,]$ps_ward_id=mp.survey[mp.survey$constituencyid==this.const,]$ps_ward_id[1]
  quiz.df[i,]$question_id=4
  quiz.df[i,]$question_type="test_most_projects"
  this.school_id=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_projects_order==3,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=mp.survey[mp.survey$constituencyid==this.const,]$test_most_projects[1]
  
  
  lat3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_projects_order==3,]$school_latitude
  long3=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_projects_order==3,]$school_longitude
  lat2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_projects_order==2,]$school_latitude
  long2=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_projects_order==2,]$school_longitude
  lat1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_projects_order==1,]$school_latitude
  long1=test.schools.mp[test.schools.mp$constituencyid==this.const & test.schools.mp$test_projects_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat3
  quiz.df[i,]$school_longitude=long3

  #if median and max are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_projects_max, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_projects_median, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat2+lat3)/2
  #  quiz.df[i,]$school_longitude=(long2+long3)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_projects_max, na.rm=T)==max(test.schools.mp[test.schools.mp$constituencyid==this.const,]$test_projects_min, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
  #  quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  
  i=i+1
  quiz.df[i,]$mp=1
  quiz.df[i,]$constituencyid=this.const
  quiz.df[i,]$ps_ward_id=mp.survey[mp.survey$constituencyid==this.const,]$ps_ward_id[1]
  quiz.df[i,]$question_id=5
  quiz.df[i,]$question_type="test_donor_specific"
  #this is for all schools, so we dont really have a school id
  this.school_id=NA
  quiz.df[i,]$school_id=NA
  quiz.df[i,]$correct=mp.survey[mp.survey$constituencyid==this.const,]$test_donor_specific[1]
  
  
  i=i+1
  quiz.df[i,]$mp=1
  quiz.df[i,]$constituencyid=this.const
  quiz.df[i,]$ps_ward_id=mp.survey[mp.survey$constituencyid==this.const,]$ps_ward_id[1]
  quiz.df[i,]$question_id=6
  quiz.df[i,]$question_type="test_enrollment_specific"
  quiz.df[i,]$correct=mp.survey[mp.survey$constituencyid==this.const,]$test_enrollment_specific[1]
  
  this.school_id=NA
  school_letter=mp.survey[mp.survey$constituencyid==this.const,]$aid_36[1]
  if(school_letter=="School A"){
    this.school_id=test.schools.mpA[test.schools.mpA$constituencyid==this.const,]$school_id
  }
  if(school_letter=="School B"){
    this.school_id=test.schools.mpB[test.schools.mpB$constituencyid==this.const,]$school_id
  }
  if(school_letter=="School C"){
    this.school_id=test.schools.mpC[test.schools.mpC$constituencyid==this.const,]$school_id
  }
  
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$school_latitude=test.schools.mp[test.schools.mp$school_id==this.school_id,]$school_latitude[1]
  quiz.df[i,]$school_longitude=test.schools.mp[test.schools.mp$school_id==this.school_id,]$school_longitude[1]
  
  
  
  i=i+1
  quiz.df[i,]$mp=1
  quiz.df[i,]$constituencyid=this.const
  quiz.df[i,]$ps_ward_id=mp.survey[mp.survey$constituencyid==this.const,]$ps_ward_id[1]
  quiz.df[i,]$question_id=7
  quiz.df[i,]$question_type="test_percentvotes_specific"
  quiz.df[i,]$correct=mp.survey[mp.survey$constituencyid==this.const,]$test_percentvotes_specific[1]
  
  this.school_id=NA
  school_letter=mp.survey[mp.survey$constituencyid==this.const,]$aid_38[1]
  if(school_letter=="School A"){
    this.school_id=test.schools.mpA[test.schools.mpA$constituencyid==this.const,]$school_id
  }
  if(school_letter=="School B"){
    this.school_id=test.schools.mpB[test.schools.mpB$constituencyid==this.const,]$school_id
  }
  if(school_letter=="School C"){
    this.school_id=test.schools.mpC[test.schools.mpC$constituencyid==this.const,]$school_id
  }
  
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$school_latitude=test.schools.mp[test.schools.mp$school_id==this.school_id,]$school_latitude[1]
  quiz.df[i,]$school_longitude=test.schools.mp[test.schools.mp$school_id==this.school_id,]$school_longitude[1]
  
  
}

for(this.ward in unique(c.survey$ps_ward_id)){
  #this.ward=1
  i=i+1
  quiz.df[i,]$mp=0
 quiz.df[i,]$constituencyid=c.survey[c.survey$ps_ward_id==this.ward,]$constituencyid[1]
  quiz.df[i,]$ps_ward_id=this.ward
  quiz.df[i,]$question_id=1
  quiz.df[i,]$question_type="test_least_classes"
  this.school_id=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_classes_order==1,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=c.survey[c.survey$ps_ward_id==this.ward,]$test_least_classes[1]
  
  lat3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_classes_order==3,]$school_latitude
  long3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_classes_order==3,]$school_longitude
  lat2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_classes_order==2,]$school_latitude
  long2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_classes_order==2,]$school_longitude
  lat1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_classes_order==1,]$school_latitude
  long1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_classes_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat1
  quiz.df[i,]$school_longitude=long1
  
  #if median and min are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_classes_min, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_classes_median, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2)/2
  #  quiz.df[i,]$school_longitude=(long1+long2)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_classes_max, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_classes_min, na.rm=T)){
    
  #  quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
   # quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  
  i=i+1
  quiz.df[i,]$mp=0
  quiz.df[i,]$ps_ward_id=this.ward
 quiz.df[i,]$constituencyid=c.survey[c.survey$ps_ward_id==this.ward,]$constituencyid[1][1]
  quiz.df[i,]$question_id=2
  quiz.df[i,]$question_type="test_most_enrollment"
  this.school_id=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_enrollment_order==3,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=c.survey[c.survey$ps_ward_id==this.ward,]$test_most_enrollment[1]
  
  
  lat3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_enrollment_order==3,]$school_latitude
  long3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_enrollment_order==3,]$school_longitude
  lat2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_enrollment_order==2,]$school_latitude
  long2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_enrollment_order==2,]$school_longitude
  lat1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_enrollment_order==1,]$school_latitude
  long1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_enrollment_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat3
  quiz.df[i,]$school_longitude=long3
  
  #if median and max are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_enrollment_max, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_enrollment_median, na.rm=T)){
    
  #  quiz.df[i,]$school_latitude=(lat2+lat3)/2
   # quiz.df[i,]$school_longitude=(long2+long3)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_enrollment_max, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_enrollment_min, na.rm=T)){
    
  #  quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
  #  quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  
  
  
  i=i+1
  quiz.df[i,]$mp=0
  quiz.df[i,]$ps_ward_id=this.ward
 quiz.df[i,]$constituencyid=c.survey[c.survey$ps_ward_id==this.ward,]$constituencyid[1][1]
  quiz.df[i,]$question_id=3
  quiz.df[i,]$question_type="test_least_percentvotes"
  this.school_id=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_percentvotes_order==1,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=c.survey[c.survey$ps_ward_id==this.ward,]$test_least_percentvotes[1]
  
  lat3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_percentvotes_order==3,]$school_latitude
  long3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_percentvotes_order==3,]$school_longitude
  lat2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_percentvotes_order==2,]$school_latitude
  long2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_percentvotes_order==2,]$school_longitude
  lat1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_percentvotes_order==1,]$school_latitude
  long1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_percentvotes_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat1
  quiz.df[i,]$school_longitude=long1
  
  #if median and min are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_percentvotes_min, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_percentvotes_median, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2)/2
  #  quiz.df[i,]$school_longitude=(long1+long2)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_percentvotes_max, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_percentvotes_min, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
  #  quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  i=i+1
  quiz.df[i,]$mp=0
  quiz.df[i,]$ps_ward_id=this.ward
 quiz.df[i,]$constituencyid=c.survey[c.survey$ps_ward_id==this.ward,]$constituencyid[1][1]
  quiz.df[i,]$question_id=4
  quiz.df[i,]$question_type="test_most_projects"
  this.school_id=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_projects_order==3,]$school_id
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$correct=c.survey[c.survey$ps_ward_id==this.ward,]$test_most_projects[1]
  
  
  lat3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_projects_order==3,]$school_latitude
  long3=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_projects_order==3,]$school_longitude
  lat2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_projects_order==2,]$school_latitude
  long2=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_projects_order==2,]$school_longitude
  lat1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_projects_order==1,]$school_latitude
  long1=test.schools.lc[test.schools.lc$ps_ward_id==this.ward & test.schools.lc$test_projects_order==1,]$school_longitude
  
  #if no duplicate scores
  quiz.df[i,]$school_latitude=lat3
  quiz.df[i,]$school_longitude=long3
  
  #if median and max are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_projects_max, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_projects_median, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat2+lat3)/2
  #  quiz.df[i,]$school_longitude=(long2+long3)/2
    
  }
  
  #if all the schools are the same, then average distances
  if(max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_projects_max, na.rm=T)==max(test.schools.lc[test.schools.lc$ps_ward_id==this.ward,]$test_projects_min, na.rm=T)){
    
   # quiz.df[i,]$school_latitude=(lat1+lat2+lat3)/3
  #  quiz.df[i,]$school_longitude=(long1+long2+long3)/3
    
  }
  
  
  i=i+1
  quiz.df[i,]$mp=0
  quiz.df[i,]$ps_ward_id=this.ward
 quiz.df[i,]$constituencyid=c.survey[c.survey$ps_ward_id==this.ward,]$constituencyid[1][1]
  quiz.df[i,]$question_id=5
  quiz.df[i,]$question_type="test_donor_specific"
  #this is for all schools, so we dont really have a school id
  this.school_id=NA
  quiz.df[i,]$school_id=NA
  quiz.df[i,]$correct=c.survey[c.survey$ps_ward_id==this.ward,]$test_donor_specific[1]
  
  
  i=i+1
  quiz.df[i,]$mp=0
  quiz.df[i,]$ps_ward_id=this.ward
 quiz.df[i,]$constituencyid=c.survey[c.survey$ps_ward_id==this.ward,]$constituencyid[1][1]
  quiz.df[i,]$question_id=6
  quiz.df[i,]$question_type="test_enrollment_specific"
  quiz.df[i,]$correct=c.survey[c.survey$ps_ward_id==this.ward,]$test_enrollment_specific[1]
  
  this.school_id=NA
  school_letter=c.survey[c.survey$ps_ward_id==this.ward,]$aid_36[1]
  if(school_letter=="School A"){
    this.school_id=test.schools.lcA[test.schools.lcA$ps_ward_id==this.ward,]$school_id
  }
  if(school_letter=="School B"){
    this.school_id=test.schools.lcB[test.schools.lcB$ps_ward_id==this.ward,]$school_id
  }
  if(school_letter=="School C"){
    this.school_id=test.schools.lcC[test.schools.lcC$ps_ward_id==this.ward,]$school_id
  }
  
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$school_latitude=test.schools.lc[test.schools.lc$school_id==this.school_id,]$school_latitude[1]
  quiz.df[i,]$school_longitude=test.schools.lc[test.schools.lc$school_id==this.school_id,]$school_longitude[1]
  
  
  
  i=i+1
  quiz.df[i,]$mp=0
  quiz.df[i,]$ps_ward_id=this.ward
 quiz.df[i,]$constituencyid=c.survey[c.survey$ps_ward_id==this.ward,]$constituencyid[1][1]
  quiz.df[i,]$question_id=7
  quiz.df[i,]$question_type="test_percentvotes_specific"
  quiz.df[i,]$correct=c.survey[c.survey$ps_ward_id==this.ward,]$test_percentvotes_specific[1]
  
  this.school_id=NA
  school_letter=c.survey[c.survey$ps_ward_id==this.ward,]$aid_38[1]
  if(school_letter=="School A"){
    this.school_id=test.schools.lcA[test.schools.lcA$ps_ward_id==this.ward,]$school_id
  }
  if(school_letter=="School B"){
    this.school_id=test.schools.lcB[test.schools.lcB$ps_ward_id==this.ward,]$school_id
  }
  if(school_letter=="School C"){
    this.school_id=test.schools.lcC[test.schools.lcC$ps_ward_id==this.ward,]$school_id
  }
  
  quiz.df[i,]$school_id=this.school_id
  quiz.df[i,]$school_latitude=test.schools.lc[test.schools.lc$school_id==this.school_id,]$school_latitude[1]
  quiz.df[i,]$school_longitude=test.schools.lc[test.schools.lc$school_id==this.school_id,]$school_longitude[1]
  
  
}



#############################


write.csv(quiz.df, "./output/quiz.csv")

write.csv(mp.survey.all, "./output/mp_all_withtests.csv")
write.csv(mp.survey, "./output/mp_withtests.csv")
write.csv(c.survey.all, "./output/c_all_withtests.csv")
write.csv(c.survey, "./output/c_withtests.csv")



